Heatmap function

create_gene_heatmap <- function(de_results_df, dge_object, font_size = 12, num_genes = 20) {
  # Subset the genes
  top_genes <- de_results_df$Symbol[1:num_genes]
  subset_matrix <- dge_object$logCPM[top_genes, colnames(dge_object$counts)]
  
  # Create annotation dataframe
  sample_info <- dge_object$samples
  annotation_df <- data.frame(
    Dose = sample_info$Dose
  )
  rownames(annotation_df) <- colnames(subset_matrix)
  
  # Reorder gene expression matrix
  ordered_samples <- order(sample_info$Dose)
  subset_matrix <- subset_matrix[, ordered_samples]
  sample_info <- sample_info[ordered_samples, ]
  
  # Create the heatmap
  heatmap <- pheatmap(subset_matrix,
           scale = "row",
           cluster_rows = FALSE,
           cluster_cols = FALSE,
           color = viridis(50),
           show_colnames = FALSE,
           annotation_col = annotation_df,
           fontsize = font_size,
           silent = FALSE)
}

Recap

These are the first experiments where I use version 2 chemistry. Read 2 is now the barcode and UMI. Read 1 is the cDNA. This was done to avoid reading through the TSO on Ilumina read 1

This sequencing run consists of 3 experiments:

Test version 2 protocol with a biological application from Zac Moore of Brain Cancer lab https://au-mynotebook.labarchives.com/share/Piper_Research_Project/MzEuMjAwMDAwMDAwMDAwMDAzfDE3MTc2NS8yNC9UcmVlTm9kZS8zMzczNDM0NjE1fDc5LjE5OTk5OTk5OTk5OTk5

Notebook aim

Differential expression testing. Focus on the timepoints that show the most dynamic changes.
Keep the 10uM dose becuase in notebook 3B this caused moderate effects.

Read SCE and extract col data

This object was generated in 2A_Clustering.
Subset only 10uM dose for simplicity.

sce <- readRDS(here::here(
  "data/TIRE_brain_human/SCEs/", "brainCancer_subset.sce.rds"
))

dge <- scran::convertTo(sce, type="edgeR")

dose <- c(0, 1.00e-05)
dge <- dge[,dge$samples$Dose_M %in% dose]

tb <- as_tibble(dge$samples)

Convert the numerical dose column to a factor. Keep the 10uM dose becuase in notebook 3B this caused moderate effects

tb$Dose <- as.factor(tb$Dose_M)
tb$Dose <- recode(tb$Dose,
                  "0" = "DMSO",
                  "1e-05" = "10uM")
dge$samples$Dose <- tb$Dose

dge <- dge[,dge$samples$Dose %in% c("DMSO", "10uM")]
dge$samples$Dose <- droplevels(dge$samples$Dose)

Create a new factor for time and dose.
This is simpler than an interaction term in the design matrix.

dge$samples$Dose_Time <- paste(dge$samples$Dose, dge$samples$Day_Exposure, sep="_d")

Differential expression testing

Have a look at the important metadata.

tb %>% 
  dplyr::count(Drug, Day_Exposure, Dose) %>% 
  arrange(Day_Exposure, Dose)

Remove genes that are lowly expressed. 11,000 genes are kept

keep <- filterByExpr(dge, group=dge$samples$Drug, min.count=1)
dge <- dge[keep,]
summary(keep)
##    Mode   FALSE    TRUE 
## logical   13030   11222

Correct for composition biases by computing normalization factors with the trimmed mean of M-values method.

dge <- calcNormFactors(dge)

Perform differential expression testing

Model matrix generation.

design <- model.matrix(~0 + Dose_Time, dge$samples)
head(design)
##            Dose_Time10uM_d3 Dose_Time10uM_d5 Dose_Time10uM_d7 Dose_TimeDMSO_d3
## ACGCCTATCG                0                0                0                0
## ATAGAACCGC                0                0                0                0
## ATCAAGAACG                1                0                0                0
## ATCGCGTTGT                0                1                0                0
## CAATCACTGA                0                0                0                0
## CCAAGACGTG                1                0                0                0
##            Dose_TimeDMSO_d5 Dose_TimeDMSO_d7
## ACGCCTATCG                0                1
## ATAGAACCGC                1                0
## ATCAAGAACG                0                0
## ATCGCGTTGT                0                0
## CAATCACTGA                1                0
## CCAAGACGTG                0                0
dge <- estimateDisp(dge, design)
summary(dge$trended.dispersion)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.01654 0.01741 0.01927 0.02051 0.02352 0.03156

Create the contrast matrix. Foucs on the largest changes.

contr.matrix <- makeContrasts(
   day5_vs_day3 = Dose_Time10uM_d5 - Dose_Time10uM_d3,
   day7_vs_day3 = Dose_Time10uM_d7 - Dose_Time10uM_d3,
    day7_vs_day5 = Dose_Time10uM_d7 - Dose_Time10uM_d5,
   levels = colnames(design))

In each contrast, the format is A - B where:

  • A represents the condition considered as the “treatment” or point of interest
  • B represents the condition considered as the “control” or baseline

So the baseline is always the earlier timepoint

Fit BCV

par(mfrow=c(1,2))
v <- voom(dge, design, plot=TRUE)

vfit <- lmFit(v, design)
vfit <- contrasts.fit(vfit, contrasts=contr.matrix)
efit <- eBayes(vfit)
plotSA(efit, main="Final model: Mean-variance trend")

Differential expression analysis

Check how many genes are differentially expressed.
There are the most at the ealrier timepoint so we focus on day 5 vs day 3

summary(decideTests(efit))
##        day5_vs_day3 day7_vs_day3 day7_vs_day5
## Down            115           76            0
## NotSig        11014        11061        11222
## Up               93           85            0

Visualise differentially expressed genes

Day 5 vs day 3

day5 <- topTable(efit, coef=1, n=length(efit$genes$ID), sort.by = "logFC")
results <- as_tibble(day5)
results$ID <- rownames(day5)

# add a column of NAs
results$DElabel <- "n/s"
# if log2Foldchange > 1 or < -1 and pvalue < 0.05, set as "UP"
results$DElabel[results$logFC > 1 & results$adj.P.Val < 0.1] <- "Day5"
results$DElabel[results$logFC < -1 & results$adj.P.Val < 0.1] <- "Day3"

Add gene labels

results$genelabels <- ""
results$genelabels[results$Symbol == "CDCA5"] <- "CDCA5"
results$genelabels[results$Symbol == "PBX1"] <- "PBX1"
results$genelabels[results$Symbol == "LDLR"] <- "LDLR"
results$genelabels[results$Symbol == "EXO1"] <- "EXO1"
results$genelabels[results$Symbol == "SPON1"] <- "SPON1"
results$genelabels[results$Symbol == "ABCA5"] <- "ABCA5"
results$genelabels[results$Symbol == "MRPS6"] <- "MRPS6"
results$genelabels[results$Symbol == "NEAT1"] <- "NEAT1"

Volcano

results$DElabel <- factor(results$DElabel, levels = c("Day5", "n/s", "Day3"))  # Adjust as per your actual labels

plt1 <- ggplot(data = results, aes(x = logFC, y = -log10(adj.P.Val), colour = DElabel, label = genelabels)) + 
  geom_point(alpha = 0.33, size = 1.5) +
  geom_text_repel(size = 4, colour = "black") +
  guides(colour = guide_legend(override.aes = list(size = 3, alpha = 1))) +
  scale_color_manual(
    values = c("darkorange", "grey", "darkblue"),  # Colors in the desired order
    name = "Upregulated",
    labels = c("Day5", "n/s", "Day3")  # Optional: Add custom labels
  ) +
  geom_vline(xintercept = 1, linetype = "dotted") + 
  geom_vline(xintercept = -1, linetype = "dotted") +
  theme_Publication()

plt1

Gene set testing with camera

Need to convert geneIDs from ensembl to enterez

geneids <- as.data.frame(v$genes$ID)
colnames(geneids) <- "ENSEMBL"

geneids$entrez <- mapIds(org.Hs.eg.db, keys = geneids$ENSEMBL, keytype = "ENSEMBL", column = "ENTREZID")

The things that go down are the typical cell cycle arrest stuff. What goes up is more interesting in the ANGIOGENESIS and protein secretion.

load("data/MSigDB/human_H_v5p2.rdata")
idx <- ids2indices(Hs.H,identifiers = geneids$entrez)
cam.day5_3<- camera(v,idx,design,contrast=contr.matrix[,1])
head(cam.day5_3,10)

Visualize the gene set testing.

par(mfrow=c(1,1))

barcodeplot(efit$t[,1], index=idx$HALLMARK_ANGIOGENESIS,
            index2 = idx$HALLMARK_E2F_TARGETS)
(top)HALLMARK_ANGIOGENESIS 
 (bottom)HALLMARK_E2F_TARGETS

(top)HALLMARK_ANGIOGENESIS (bottom)HALLMARK_E2F_TARGETS

Visualize as a barplot

geom_GeneSet_Barchart(cam.day5_3)

day 7 vs day 3

day7 <- topTable(efit, coef=2, n=length(efit$genes$ID), sort.by = "logFC")
results <- as_tibble(day7)
results$ID <- rownames(day7)

# add a column of NAs
results$DElabel <- "n/s"
# if log2Foldchange > 1 or < -1 and pvalue < 0.05, set as "UP"
results$DElabel[results$logFC > 1 & results$adj.P.Val < 0.1] <- "Day7"
results$DElabel[results$logFC < -1 & results$adj.P.Val < 0.1] <- "Day3"

Add gene labels

results$genelabels <- ""
results$genelabels[results$Symbol == "PBX1"] <- "PBX1"
results$genelabels[results$Symbol == "RPH3A"] <- "RPH3A"
results$genelabels[results$Symbol == "KIAA0391"] <- "KIAA0391"
results$genelabels[results$Symbol == "CENPP"] <- "CENPP"
results$genelabels[results$Symbol == "CHTF18"] <- "CHTF18"
results$genelabels[results$Symbol == "FOSL2"] <- "FOSL2"

Volcano

results$DElabel <- factor(results$DElabel, levels = c("Day7", "n/s", "Day3"))  # Adjust as per your actual labels

plt2 <- ggplot(data = results, aes(x = logFC, y = -log10(adj.P.Val), colour = DElabel, label = genelabels)) + 
  geom_point(alpha = 0.33, size = 1.5) +
  geom_text_repel(size = 4, colour = "black") +
  guides(colour = guide_legend(override.aes = list(size = 3, alpha = 1))) +
  scale_color_manual(
    values = c("darkorange", "grey", "darkblue"),  # Colors in the desired order
    name = "Upregulated",
    labels = c("Day7", "n/s", "Day3")  # Optional: Add custom labels
  ) +
  geom_vline(xintercept = 1, linetype = "dotted") + 
  geom_vline(xintercept = -1, linetype = "dotted") +
  theme_Publication()

plt2

Gene set testing with camera

The things that go down are the typical cell cycle arrest stuff. What goes up is more interesting in the ANGIOGENESIS and hedgehog signalling.

cam.day7_3<- camera(v,idx,design,contrast=contr.matrix[,2])
head(cam.day7_3,10)

Visualize the gene set testing.

par(mfrow=c(1,1))

barcodeplot(efit$t[,1], index=idx$HALLMARK_HEDGEHOG_SIGNALING,
            index2 = idx$HALLMARK_E2F_TARGETS)
(top)HALLMARK_HEDGEHOG_SIGNALING 
 (bottom)HALLMARK_E2F_TARGETS

(top)HALLMARK_HEDGEHOG_SIGNALING (bottom)HALLMARK_E2F_TARGETS

Visualize as a barplot

geom_GeneSet_Barchart(cam.day7_3)

Session info

sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Red Hat Enterprise Linux 9.5 (Plow)
## 
## Matrix products: default
## BLAS:   /stornext/System/data/software/rhel/9/base/tools/R/4.4.1/lib64/R/lib/libRblas.so 
## LAPACK: /stornext/System/data/software/rhel/9/base/tools/R/4.4.1/lib64/R/lib/libRlapack.so;  LAPACK version 3.12.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] grid      stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] ggthemes_5.1.0              here_1.0.1                 
##  [3] patchwork_1.3.0             viridis_0.6.5              
##  [5] viridisLite_0.4.2           pheatmap_1.0.12            
##  [7] ggrepel_0.9.6               scater_1.32.1              
##  [9] org.Hs.eg.db_3.19.1         AnnotationDbi_1.66.0       
## [11] edgeR_4.2.2                 limma_3.60.6               
## [13] biomaRt_2.60.1              scuttle_1.14.0             
## [15] lubridate_1.9.3             forcats_1.0.0              
## [17] stringr_1.5.1               dplyr_1.1.4                
## [19] purrr_1.0.2                 readr_2.1.5                
## [21] tidyr_1.3.1                 tibble_3.2.1               
## [23] ggplot2_3.5.1               tidyverse_2.0.0            
## [25] SingleCellExperiment_1.26.0 SummarizedExperiment_1.34.0
## [27] Biobase_2.64.0              GenomicRanges_1.56.2       
## [29] GenomeInfoDb_1.40.1         IRanges_2.38.1             
## [31] S4Vectors_0.42.1            BiocGenerics_0.50.0        
## [33] MatrixGenerics_1.16.0       matrixStats_1.4.1          
## 
## loaded via a namespace (and not attached):
##  [1] RColorBrewer_1.1-3        rstudioapi_0.17.0        
##  [3] jsonlite_1.8.9            magrittr_2.0.3           
##  [5] ggbeeswarm_0.7.2          farver_2.1.2             
##  [7] rmarkdown_2.28            zlibbioc_1.50.0          
##  [9] vctrs_0.6.5               memoise_2.0.1            
## [11] DelayedMatrixStats_1.26.0 htmltools_0.5.8.1        
## [13] S4Arrays_1.4.1            progress_1.2.3           
## [15] curl_5.2.3                BiocNeighbors_1.22.0     
## [17] SparseArray_1.4.8         sass_0.4.9               
## [19] bslib_0.8.0               httr2_1.0.5              
## [21] cachem_1.1.0              igraph_2.0.3             
## [23] lifecycle_1.0.4           pkgconfig_2.0.3          
## [25] rsvd_1.0.5                Matrix_1.7-0             
## [27] R6_2.5.1                  fastmap_1.2.0            
## [29] GenomeInfoDbData_1.2.12   digest_0.6.37            
## [31] colorspace_2.1-1          rprojroot_2.0.4          
## [33] dqrng_0.4.1               irlba_2.3.5.1            
## [35] RSQLite_2.3.7             beachmat_2.20.0          
## [37] labeling_0.4.3            filelock_1.0.3           
## [39] fansi_1.0.6               timechange_0.3.0         
## [41] httr_1.4.7                abind_1.4-8              
## [43] compiler_4.4.1            bit64_4.5.2              
## [45] withr_3.0.1               BiocParallel_1.38.0      
## [47] DBI_1.2.3                 highr_0.11               
## [49] rappdirs_0.3.3            DelayedArray_0.30.1      
## [51] bluster_1.14.0            tools_4.4.1              
## [53] vipor_0.4.7               beeswarm_0.4.0           
## [55] glue_1.8.0                cluster_2.1.6            
## [57] generics_0.1.3            gtable_0.3.5             
## [59] tzdb_0.4.0                hms_1.1.3                
## [61] metapod_1.12.0            BiocSingular_1.20.0      
## [63] ScaledMatrix_1.12.0       xml2_1.3.6               
## [65] utf8_1.2.4                XVector_0.44.0           
## [67] pillar_1.9.0              splines_4.4.1            
## [69] BiocFileCache_2.12.0      lattice_0.22-6           
## [71] bit_4.5.0                 tidyselect_1.2.1         
## [73] locfit_1.5-9.10           Biostrings_2.72.1        
## [75] knitr_1.48                gridExtra_2.3            
## [77] xfun_0.48                 statmod_1.5.0            
## [79] stringi_1.8.4             UCSC.utils_1.0.0         
## [81] yaml_2.3.10               evaluate_1.0.1           
## [83] codetools_0.2-20          cli_3.6.3                
## [85] munsell_0.5.1             jquerylib_0.1.4          
## [87] Rcpp_1.0.13               dbplyr_2.5.0             
## [89] png_0.1-8                 parallel_4.4.1           
## [91] blob_1.2.4                prettyunits_1.2.0        
## [93] scran_1.32.0              sparseMatrixStats_1.16.0 
## [95] scales_1.3.0              crayon_1.5.3             
## [97] rlang_1.1.4               KEGGREST_1.44.1